First, let’s bring in the data and munge it appropriately.
# Set working directory ----
#setwd("/Users/bfoster/Desktop/2017-edc/science-fairs-analyses")
# Load packaes ----
if (!require("pacman")) install.packages("pacman")
pacman::p_load(psych, ggplot2, ggalt, ggthemes,
readr, dplyr, knitr, scales, pander, kableExtra, stringr, scales,
mirt, ltm, tidyverse, formattable, gridExtra, tidyverse, broom, lavaan)
# Import the data ----
joined.dat <- readRDS(file = "../data/joined.dat")
# Munge data ----
items <- joined.dat %>%
dplyr::select(StudentID, s_preSEP_01, s_preSEP_02, s_preSEP_03,
s_preSEP_04, s_preSEP_05, s_preSEP_06,
s_preSEP_07, s_preSEP_08, s_preSEP_09,
s_preSEP_10, s_preSEP_11, s_preSEP_12,
s_preSEP_13, s_preSEP_14) %>%
rename_(.dots=setNames(names(.), gsub("s_postInt_", "", names(.))))
# create dataframe for item reference
item.ref <- tibble(
Item = colnames(items)[-1],
Number = 1:14)
The syntax below creates the item descriptive statistics using the ltm packages, and conducts all necessary munging for printing tables and plots.
# easy item descriptive statistics from the 'ltm' package
pre.items.descriptives <- descript(items[-1], chi.squared = TRUE,
B = 1000)
# extract the proportions in each categoty
pre.per <- do.call(rbind, lapply((pre.items.descriptives[2]), data.frame,
stringsAsFactors=FALSE)) %>%
mutate(item = colnames(items)[-1]) %>%
rename(Wrong = X0, Correct = X1) %>%
dplyr::select(item, Wrong, Correct)
# convert to long for plotting
pre.per.long <- gather(pre.per, cat, value, -item) %>%
arrange(item)
Let’s look at the percent of missing responses for each item. A color bar has been added to the values in the table to compare the relative proportion missing per each item. Nothing looks alarming.
# extract the proportions in each categoty
do.call(rbind, lapply((pre.items.descriptives[7]), data.frame,
stringsAsFactors=FALSE)) %>%
rownames_to_column("Statistic") %>%
filter(Statistic=="missin.(%)") %>%
gather(item, value, -Statistic) %>%
dplyr::select(item, value) %>%
rename(Percent = value, Item = item) %>%
mutate("Percent Missing" = color_bar("lightgreen")((Percent/100)*100)) %>%
dplyr::select(Item, "Percent Missing") %>%
kable(digits = 2, format="html", caption="Category Utilization for pre-Administration
Period", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
| Item | Percent Missing |
|---|---|
| s_preSEP_01 | 1.048218 |
| s_preSEP_02 | 2.306080 |
| s_preSEP_03 | 3.773585 |
| s_preSEP_04 | 3.144654 |
| s_preSEP_05 | 2.515723 |
| s_preSEP_06 | 5.870021 |
| s_preSEP_07 | 4.612159 |
| s_preSEP_08 | 3.354298 |
| s_preSEP_09 | 5.031447 |
| s_preSEP_10 | 5.241090 |
| s_preSEP_11 | 5.450734 |
| s_preSEP_12 | 6.708595 |
| s_preSEP_13 | 5.870021 |
| s_preSEP_14 | 6.498952 |
Let’s look at the table of the proportions correct for each item in order to see if anything looks aberrant. It’s worth noting that for many items, almost 70% of students are providing the correct answer.
# print the table
pre.per %>%
rename(Item = item) %>%
kable(digits = 3, format="html", caption="Category Utilization for Pre-Administration
Period") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
| Item | Wrong | Correct |
|---|---|---|
| s_preSEP_01 | 0.328 | 0.672 |
| s_preSEP_02 | 0.582 | 0.418 |
| s_preSEP_03 | 0.386 | 0.614 |
| s_preSEP_04 | 0.686 | 0.314 |
| s_preSEP_05 | 0.368 | 0.632 |
| s_preSEP_06 | 0.327 | 0.673 |
| s_preSEP_07 | 0.549 | 0.451 |
| s_preSEP_08 | 0.616 | 0.384 |
| s_preSEP_09 | 0.554 | 0.446 |
| s_preSEP_10 | 0.416 | 0.584 |
| s_preSEP_11 | 0.370 | 0.630 |
| s_preSEP_12 | 0.807 | 0.193 |
| s_preSEP_13 | 0.470 | 0.530 |
| s_preSEP_14 | 0.312 | 0.688 |
A visualization is provided for another perspective to examine proportion correct. Note, that I’ve added a horizontal line at .50 for reference.
# plot the proportions
p_pl1_prop <- ggplot() + geom_bar(aes(y = value, x = item, fill = cat),
data = pre.per.long, stat="identity") +
ggtitle("Proportion Correct of SEP Items") +
labs(x="Items", y="Proportion of Response Option", fill="Category") +
scale_fill_ptol() + theme_minimal() +
geom_hline(yintercept = .5) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
p_pl1_prop
A histogram of the total score is provided to examine whether the total score for the measure is normally distributed with no obvious ceiling or floor effects.
total <- rowSums(items[-1])
histogram(~total, breaks=10)
The syntax below fits both the Rasch model, and the 2 & 3-PL models for dichotomous data. Standard errors are calculated based on the sandwich covariance estimate, which was chosen to adjust for nesting in the data. Results for the best fitting model (i.e., lowest sample adjusted BIC), indicated that the 2-PL model should be utilized in subsequent model testing.
# define the number of cores for quicker processing
mirtCluster(5)
# drop missing
items.noNA <- na.omit(items)
# run the Rasch model
set.seed(8675309)
model_1pl_rasch <- mirt(items.noNA[-1], 1,
itemtype = 'Rasch',
technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000),
SE = TRUE, SE.type = 'sandwich',
verbose = FALSE)
#check model convergence
model_1pl_rasch
##
## Call:
## mirt(data = items.noNA[-1], model = 1, itemtype = "Rasch", SE = TRUE,
## SE.type = "sandwich", verbose = FALSE, technical = list(removeEmptyRows = TRUE,
## parallel = TRUE, NCYCLES = 8000))
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 24 EM iterations.
## mirt version: 1.26.3
## M-step optimizer: L-BFGS-B
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
##
## Information matrix estimated with method: sandwich
## Condition number of information matrix = 11.8419
## Second-order test: model is a possible local maximum
##
## Log-likelihood = -3511.011
## Estimated parameters: 15
## AIC = 7052.021; AICc = 7053.281
## BIC = 7111.78; SABIC = 7064.185
## G2 (16368) = 2318.96, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
# 2-PL model
set.seed(8675309)
model_2pl <- mirt(items.noNA[-1], 1,
itemtype = '2PL',
technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000),
SE = TRUE, SE.type = 'sandwich',
verbose = FALSE)
# check to see that model converged
model_2pl
##
## Call:
## mirt(data = items.noNA[-1], model = 1, itemtype = "2PL", SE = TRUE,
## SE.type = "sandwich", verbose = FALSE, technical = list(removeEmptyRows = TRUE,
## parallel = TRUE, NCYCLES = 8000))
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 14 EM iterations.
## mirt version: 1.26.3
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
##
## Information matrix estimated with method: sandwich
## Condition number of information matrix = 14.78161
## Second-order test: model is a possible local maximum
##
## Log-likelihood = -3474.953
## Estimated parameters: 28
## AIC = 7005.906; AICc = 7010.319
## BIC = 7117.456; SABIC = 7028.612
## G2 (16355) = 2246.84, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
# 3-PL model (computationally singular. Sample is small, so we don't worry too much)
set.seed(8675309)
model_3pl <- mirt(items.noNA[-1], 1,
itemtype = '3PL',
technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 6000),
SE = TRUE, SE.type = 'sandwich',
verbose = FALSE)
# check model convergence
model_3pl
##
## Call:
## mirt(data = items.noNA[-1], model = 1, itemtype = "3PL", SE = TRUE,
## SE.type = "sandwich", verbose = FALSE, technical = list(removeEmptyRows = TRUE,
## parallel = TRUE, NCYCLES = 6000))
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 912 EM iterations.
## mirt version: 1.26.3
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
##
## Information matrix estimated with method: sandwich
## Condition number of information matrix = 6387.634
## Second-order test: model is a possible local maximum
##
## Log-likelihood = -3466.177
## Estimated parameters: 42
## AIC = 7016.353; AICc = 7026.557
## BIC = 7183.679; SABIC = 7050.411
## G2 (16341) = 2229.29, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
# examine sample adjusted BIC for best fitting model
mods.SABIC <- tibble(
"Rasch" = model_1pl_rasch@Fit$SABIC,
"2PL" = model_2pl@Fit$SABIC,
"3PL" = model_3pl@Fit$SABIC)%>%
gather(key, SABIC)%>%
arrange(SABIC)
# print table
mods.SABIC %>%
kable(digits = 2, format="html", caption="Model Selection With SABIC", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
| key | SABIC |
|---|---|
| 2PL | 7028.61 |
| 3PL | 7050.41 |
| Rasch | 7064.18 |
# if needed, log-likelihood test can be provided with the command below
#anova(model_1pl_rasch, model_2pl)
Inspect the parameter (i.e., IRT adjusted), for the best fitting model.
as.data.frame(coef(model_2pl, IRTparms = T, simplify = TRUE)) %>%
rename(discrimination = items.a1,
difficulty = items.d) %>%
mutate(Item = colnames(items)[-1]) %>%
dplyr::select(Item, discrimination, difficulty) %>%
#arrange(-discrimination) %>%
kable(digits = 2, format="html", caption="Item IRT Parameters", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
| Item | discrimination | difficulty |
|---|---|---|
| s_preSEP_01 | 1.01 | 0.96 |
| s_preSEP_02 | 0.61 | -0.41 |
| s_preSEP_03 | 0.44 | 0.53 |
| s_preSEP_04 | 0.59 | -0.80 |
| s_preSEP_05 | 0.37 | 0.64 |
| s_preSEP_06 | 0.63 | 0.78 |
| s_preSEP_07 | 0.49 | -0.19 |
| s_preSEP_08 | 0.32 | -0.52 |
| s_preSEP_09 | 0.83 | -0.24 |
| s_preSEP_10 | 1.44 | 0.54 |
| s_preSEP_11 | 0.97 | 0.76 |
| s_preSEP_12 | -0.22 | -1.44 |
| s_preSEP_13 | 1.39 | 0.23 |
| s_preSEP_14 | 0.55 | 0.88 |
#mirt:::mirt2traditional(model_2pl)
Next, the category response curves are examined. We’re looking for two things: 1) the location of the item along the ability scale (i.e., difficulty), and 2) how well an item can differentiate among examinees who have abilities above and below the item location (i.e., the discrimination parameter). The steeper the curve, the better it can discriminate. Flatter curves indicate an almost equal probability of getting an item correct at either end of the ability continuum. The plots below indicate several problematic items: 2, 3, 4, 5, 7, 8, and 12. Of these problematic items, item 12 should most certainly be removed from the analyses, as it has a negative discrimination parameter, which yields a monotonically decreasing item response function. This result indicates that people with high ability have a lower probability of responding correctly than people of low ability. The best discriminating items are: 1, 2, 10, 11, and 13, as these all exhibit the steepest curves.
Reference table for CRC plots:
# table for reference
item.ref %>%
kable(digits = 2, format="html", caption="Item Labels and Reference Numbers", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
| Item | Number |
|---|---|
| s_preSEP_01 | 1 |
| s_preSEP_02 | 2 |
| s_preSEP_03 | 3 |
| s_preSEP_04 | 4 |
| s_preSEP_05 | 5 |
| s_preSEP_06 | 6 |
| s_preSEP_07 | 7 |
| s_preSEP_08 | 8 |
| s_preSEP_09 | 9 |
| s_preSEP_10 | 10 |
| s_preSEP_11 | 11 |
| s_preSEP_12 | 12 |
| s_preSEP_13 | 13 |
| s_preSEP_14 | 14 |
Generate CRCs for each item:
# create function to plot combined CRC and inforamtion
plotCRC<-function(i){
itemplot(model_2pl, i, type = 'infotrace')
}
# plot all items using the function
lapply(colnames(items)[-1], plotCRC)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
Plot CRCs in same frame so that difficulty of items can be visually examined:
# plot the all of the curves in the same lattice plot.
plot(model_2pl, type = 'trace', which.items = 1:14, facet_items=FALSE)
Global Fit: How well do these models fit the data, and do the items appear to behave well given the selected itemtypes? The M2() function is used to assess global model fit. Overall, the model fits the data well.
Criteria for evaluating overall model fit:
RMSEA <= .05
SRMR <= .05
Non-significant M2 statistic
TLI and CFI >= .90
# Global fit ---
# M2
M2_model_2pl <- M2(model_2pl, impute = 100, residmat = FALSE)
M2_model_2pl %>%
kable(digits = 2, format="html", caption="Global Fit Statistics", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
| M2 | df | p | RMSEA | RMSEA_5 | RMSEA_95 | SRMSR | TLI | CFI | |
|---|---|---|---|---|---|---|---|---|---|
| stats | 98.93 | 77 | 0.05 | 0.03 | 0 | 0.04 | 0.05 | 0.92 | 0.93 |
Item Fit: The itemfit() in a piece-wise assessment of item fit using Orlando and Thissen’s (2000) S_X2 statistic. An alpha of <= .01 is typically used to indicate misfitting items. Results hint at some issues with potentially misfitting items (i.e., 10, 13, and 3), but none of them reach the critical alpha value. Followup analyses could be conducted using itemGAM()to estimate the response curves for these items to hypothesize as to why these items are on the fringe of misfitting.
# Piecewise misfit ---
# item fit statistics
itemfit_2pl <- itemfit(model_2pl, impute = 100)
# apply a false discovery rate to the p-value
# p.fdr <- p.adjust(itemfit_2pl$p.S_X2,"BH")
# itemfit_2pl <- cbind(itemfit_2pl, p.fdr) # bind to postvious work
# sort the item fit statistics by p-value
itemfit_2pl %>%
slice(1:14) %>%
arrange(p.S_X2) %>%
kable(digits = 2, format="html", caption="Item Fit Statistics", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
| item | S_X2 | df.S_X2 | p.S_X2 |
|---|---|---|---|
| s_preSEP_13 | 12.71 | 7 | 0.08 |
| s_preSEP_10 | 11.36 | 7 | 0.12 |
| s_preSEP_03 | 12.58 | 9 | 0.18 |
| s_preSEP_12 | 12.31 | 9 | 0.20 |
| s_preSEP_14 | 10.07 | 8 | 0.26 |
| s_preSEP_11 | 7.57 | 8 | 0.48 |
| s_preSEP_06 | 6.64 | 8 | 0.58 |
| s_preSEP_01 | 6.45 | 8 | 0.60 |
| s_preSEP_08 | 6.21 | 8 | 0.62 |
| s_preSEP_04 | 5.87 | 8 | 0.66 |
| s_preSEP_05 | 6.59 | 9 | 0.68 |
| s_preSEP_02 | 3.98 | 8 | 0.86 |
| s_preSEP_09 | 3.31 | 8 | 0.91 |
| s_preSEP_07 | 2.23 | 8 | 0.97 |
# item GAM ----
# items.na <- na.omit(items)
# model_2pl.na <- mirt(items.na[-1], 1,
# itemtype = '2PL',
# technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000),
# SE = TRUE, SE.type = 'sandwich',
# verbose = FALSE)
# colnames(items.na)
# Theta <- fscores(model_2pl.na, method = "MAP", full.scores = TRUE)
# IG12 <- itemGAM(items.na[,13], Theta)
# summary(IG12)
# plot(IG12)
Local dependence: Yen’s index of local dependence Q3 is provided. Q3 is a pairwise index of correlation of the residuals from the IRT model. If some sets of items present a significant level of residual correlation, then those items can be considered as locally dependent (Yen, 1993). Q3 statistics >= .2 are automatic. The Q3 statistic index indicated some local dependence between item 13 and item 1 and item 10. Recall from the analysis of item fit that items 10 and 13 were potentially problematic. In terms of the IRT estimates this could present some issues. Some results studies have shown that if negative local dependence is not modeled, the discrimination parameters of the interdependent items are underestimated. Research also shows that the discrimination parameter (aj) depends on the difficulty of the item it interacts with (i.e., is locally dependent with), but not on the difficulty of the item itself. There are several ways to deal with this dependency: 1) model it as a testlet, incorporate it as a parameter into the model, all of which are quite time consuming and warrant their own set of analyses. A further complication with this measure is that only 2 items exhibit local dependence. To model this in mirt would require the analyst to specify a separate factor for the locally dependent items, in this case because there are only two locally dependent items, the slopes for these would need to be constrained equal. A separate theta score for the conditionally dependent items would be generated, and because there are only two items you’d likely observe this score to behave erratically. If elimination of the items is not possible, then you just need to accept that the discrimination parameters for these two items are underestimated, and not take too much stock in the difficulty estimates (i.e., if you observe these two items represent either the floor or the ceiling of the measure I’d worry about including them in the analysis).
Example of the constrained model statement:
mod.statement <- ’ THETA = s_preSEP_01, s_preSEP_02, s_preSEP_03, s_preSEP_04, s_preSEP_05, s_preSEP_06, s_preSEP_07, s_preSEP_08, s_preSEP_09, s_preSEP_10, s_preSEP_11, s_preSEP_12, s_preSEP_13, s_preSEP_14
# two items exhibiting local dependence (should these two items not be included in the THETA call above?) RESID.THETA = s_preSEP_10, s_preSEP_13
# constrain the two slopes to identify the construct for the factor with local dependence CONSTRAIN = (s_preSEP_10, s_preSEP_13, a2)
COV = THETA*THETA ’
tidy(residuals(model_2pl, type = 'Q3', method = 'ML', suppress = .19))
## Q3 matrix:
## .rownames s_preSEP_01 s_preSEP_02 s_preSEP_03 s_preSEP_04 s_preSEP_05
## 1 s_preSEP_01 1 NA NA NA NA
## 2 s_preSEP_02 NA 1 NA NA NA
## 3 s_preSEP_03 NA NA 1 NA NA
## 4 s_preSEP_04 NA NA NA 1 NA
## 5 s_preSEP_05 NA NA NA NA 1
## 6 s_preSEP_06 NA NA NA NA NA
## 7 s_preSEP_07 NA NA NA NA NA
## 8 s_preSEP_08 NA NA NA NA NA
## 9 s_preSEP_09 NA NA NA NA NA
## 10 s_preSEP_10 NA NA NA NA NA
## 11 s_preSEP_11 NA NA NA NA NA
## 12 s_preSEP_12 NA NA NA NA NA
## 13 s_preSEP_13 NA NA NA NA NA
## 14 s_preSEP_14 NA NA NA NA NA
## s_preSEP_06 s_preSEP_07 s_preSEP_08 s_preSEP_09 s_preSEP_10 s_preSEP_11
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 1 NA NA NA NA NA
## 7 NA 1 NA NA NA NA
## 8 NA NA 1 NA NA NA
## 9 NA NA NA 1 NA NA
## 10 NA NA NA NA 1 NA
## 11 NA NA NA NA NA 1
## 12 NA NA NA NA NA NA
## 13 NA NA NA NA NA NA
## 14 NA NA NA NA NA NA
## s_preSEP_12 s_preSEP_13 s_preSEP_14
## 1 NA -0.2361825 NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## 7 NA NA NA
## 8 NA NA NA
## 9 NA NA NA
## 10 NA -0.3258808 NA
## 11 NA NA NA
## 12 1 NA NA
## 13 NA 1.0000000 NA
## 14 NA NA 1
Unidimensionality: A CFA is carried out to test the assumption that the measure is unidimensional. Fit statistics for the measure look OK, except the parameter for estimate for factor loading for item 12 is negative. This item was problematic in the IRT analyses above, as it displayed a negative discrimination parameter. It is an obvious candidate to remove from follow-up analyses.
model.1 <- '
# measurement model
factor =~ s_preSEP_01 + s_preSEP_02 + s_preSEP_03 +
s_preSEP_04 + s_preSEP_05 + s_preSEP_06 +
s_preSEP_07 + s_preSEP_08 + s_preSEP_09 +
s_preSEP_10 + s_preSEP_11 + s_preSEP_12 +
s_preSEP_13 + s_preSEP_14
'
fit.1 <- cfa(model.1, data=items.noNA, std.lv = TRUE,
ordered = c("s_preSEP_01", "s_preSEP_02", "s_preSEP_03",
"s_preSEP_04", "s_preSEP_05", "s_preSEP_06",
"s_preSEP_07", "s_preSEP_08", "s_preSEP_09",
"s_preSEP_10", "s_preSEP_11", "s_preSEP_12",
"s_preSEP_13", "s_preSEP_14"))
fitMeasures(fit.1)
## npar fmin
## 28.000 0.109
## chisq df
## 86.209 77.000
## pvalue chisq.scaled
## 0.221 100.553
## df.scaled pvalue.scaled
## 77.000 0.037
## chisq.scaling.factor baseline.chisq
## 0.939 463.306
## baseline.df baseline.pvalue
## 91.000 0.000
## baseline.chisq.scaled baseline.df.scaled
## 401.374 91.000
## baseline.pvalue.scaled baseline.chisq.scaling.factor
## 0.000 1.200
## cfi tli
## 0.975 0.971
## nnfi rfi
## 0.971 0.780
## nfi pnfi
## 0.814 0.689
## ifi rni
## 0.976 0.975
## cfi.scaled tli.scaled
## 0.924 0.910
## cfi.robust tli.robust
## NA NA
## nnfi.scaled nnfi.robust
## 0.910 NA
## rfi.scaled nfi.scaled
## 0.704 0.749
## ifi.scaled rni.scaled
## 0.749 0.937
## rni.robust rmsea
## NA 0.017
## rmsea.ci.lower rmsea.ci.upper
## 0.000 0.034
## rmsea.pvalue rmsea.scaled
## 1.000 0.028
## rmsea.ci.lower.scaled rmsea.ci.upper.scaled
## 0.007 0.042
## rmsea.pvalue.scaled rmsea.robust
## 0.997 NA
## rmsea.ci.lower.robust rmsea.ci.upper.robust
## NA NA
## rmsea.pvalue.robust rmr
## NA 0.070
## rmr_nomean srmr
## 0.075 0.075
## srmr_bentler srmr_bentler_nomean
## 0.070 0.075
## srmr_bollen srmr_bollen_nomean
## 0.070 0.075
## srmr_mplus srmr_mplus_nomean
## 0.070 0.075
## cn_05 cn_01
## 453.388 500.639
## gfi agfi
## 0.912 0.880
## pgfi mfi
## 0.669 0.988
Results testing a 1-factor solution for the measure, with item 12 removed, also look good.
# dropping item 12
model.2 <- '
# measurement model
factor =~ s_preSEP_01 + s_preSEP_02 + s_preSEP_03 +
s_preSEP_04 + s_preSEP_05 + s_preSEP_06 +
s_preSEP_07 + s_preSEP_08 + s_preSEP_09 +
s_preSEP_10 + s_preSEP_11 +
s_preSEP_13 + s_preSEP_14
'
fit.2 <- cfa(model.2, data=items.noNA, std.lv = TRUE,
ordered = c("s_preSEP_01", "s_preSEP_02", "s_preSEP_03",
"s_preSEP_04", "s_preSEP_05", "s_preSEP_06",
"s_preSEP_07", "s_preSEP_08", "s_preSEP_09",
"s_preSEP_10", "s_preSEP_11", "s_preSEP_12",
"s_preSEP_13", "s_preSEP_14"))
fitMeasures(fit.2)
## npar fmin
## 26.000 0.093
## chisq df
## 73.599 65.000
## pvalue chisq.scaled
## 0.217 86.933
## df.scaled pvalue.scaled
## 65.000 0.036
## chisq.scaling.factor baseline.chisq
## 0.915 446.682
## baseline.df baseline.pvalue
## 78.000 0.000
## baseline.chisq.scaled baseline.df.scaled
## 388.944 78.000
## baseline.pvalue.scaled baseline.chisq.scaling.factor
## 0.000 1.186
## cfi tli
## 0.977 0.972
## nnfi rfi
## 0.972 0.802
## nfi pnfi
## 0.835 0.696
## ifi rni
## 0.977 0.977
## cfi.scaled tli.scaled
## 0.929 0.915
## cfi.robust tli.robust
## NA NA
## nnfi.scaled nnfi.robust
## 0.915 NA
## rfi.scaled nfi.scaled
## 0.732 0.776
## ifi.scaled rni.scaled
## 0.776 0.941
## rni.robust rmsea
## NA 0.018
## rmsea.ci.lower rmsea.ci.upper
## 0.000 0.036
## rmsea.pvalue rmsea.scaled
## 0.999 0.029
## rmsea.ci.lower.scaled rmsea.ci.upper.scaled
## 0.008 0.044
## rmsea.pvalue.scaled rmsea.robust
## 0.991 NA
## rmsea.ci.lower.robust rmsea.ci.upper.robust
## NA NA
## rmsea.pvalue.robust rmr
## NA 0.068
## rmr_nomean srmr
## 0.073 0.073
## srmr_bentler srmr_bentler_nomean
## 0.068 0.073
## srmr_bollen srmr_bollen_nomean
## 0.068 0.073
## srmr_mplus srmr_mplus_nomean
## 0.068 0.073
## cn_05 cn_01
## 457.381 509.042
## gfi agfi
## 0.910 0.874
## pgfi mfi
## 0.650 0.989
# 2-PL model
set.seed(8675309)
#colnames(items)
items.noNA_no12 <- dplyr::select(items.noNA, StudentID, s_preSEP_01, s_preSEP_02, s_preSEP_03, s_preSEP_04, s_preSEP_05, s_preSEP_06,
s_preSEP_07, s_preSEP_08, s_preSEP_09, s_preSEP_10, s_preSEP_11, s_preSEP_13, s_preSEP_14)
model_2pl_b <- mirt(items.noNA_no12[-1], 1,
itemtype = '2PL',
technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000),
SE = TRUE, SE.type = 'sandwich',
verbose = FALSE)
# check to see that model converged
model_2pl_b
Call: mirt(data = items.noNA_no12[-1], model = 1, itemtype = “2PL”, SE = TRUE, SE.type = “sandwich”, verbose = FALSE, technical = list(removeEmptyRows = TRUE, parallel = TRUE, NCYCLES = 8000))
Full-information item factor analysis with 1 factor(s). Converged within 1e-04 tolerance after 13 EM iterations. mirt version: 1.26.3 M-step optimizer: BFGS EM acceleration: Ramsay Number of rectangular quadrature: 61
Information matrix estimated with method: sandwich Condition number of information matrix = 14.05953 Second-order test: model is a possible local maximum
Log-likelihood = -3280.589 Estimated parameters: 26 AIC = 6613.179; AICc = 6616.973 BIC = 6716.761; SABIC = 6634.262 G2 (8165) = 1893.11, p = 1 RMSEA = 0, CFI = NaN, TLI = NaN
# item parameter statistics ----
as.data.frame(coef(model_2pl_b, IRTparms = T, simplify = TRUE)) %>%
rename(discrimination = items.a1,
difficulty = items.d) %>%
mutate(Item = colnames(items.noNA_no12)[-1]) %>%
dplyr::select(Item, discrimination, difficulty) %>%
#arrange(-discrimination) %>%
kable(digits = 2, format="html", caption="Item IRT Parameters", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
| Item | discrimination | difficulty |
|---|---|---|
| s_preSEP_01 | 1.04 | 0.97 |
| s_preSEP_02 | 0.60 | -0.41 |
| s_preSEP_03 | 0.44 | 0.53 |
| s_preSEP_04 | 0.60 | -0.80 |
| s_preSEP_05 | 0.37 | 0.64 |
| s_preSEP_06 | 0.62 | 0.78 |
| s_preSEP_07 | 0.50 | -0.19 |
| s_preSEP_08 | 0.31 | -0.52 |
| s_preSEP_09 | 0.84 | -0.24 |
| s_preSEP_10 | 1.43 | 0.54 |
| s_preSEP_11 | 0.95 | 0.75 |
| s_preSEP_13 | 1.37 | 0.22 |
| s_preSEP_14 | 0.55 | 0.88 |
# item fit statistics ----
itemfit_2pl_b <- itemfit(model_2pl_b, impute = 100)
# apply a false discovery rate to the p-value
# p.fdr <- p.adjust(itemfit_2pl$p.S_X2,"BH")
# itemfit_2pl <- cbind(itemfit_2pl, p.fdr) # bind to postvious work
# sort the item fit statistics by p-value
itemfit_2pl_b %>%
slice(1:13) %>%
arrange(p.S_X2) %>%
kable(digits = 2, format="html", caption="Item Fit Statistics", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
| item | S_X2 | df.S_X2 | p.S_X2 |
|---|---|---|---|
| s_preSEP_03 | 14.07 | 8 | 0.08 |
| s_preSEP_10 | 9.87 | 7 | 0.20 |
| s_preSEP_13 | 9.64 | 7 | 0.21 |
| s_preSEP_05 | 9.27 | 8 | 0.32 |
| s_preSEP_14 | 9.04 | 8 | 0.34 |
| s_preSEP_08 | 9.96 | 9 | 0.35 |
| s_preSEP_06 | 7.66 | 8 | 0.47 |
| s_preSEP_11 | 7.41 | 8 | 0.49 |
| s_preSEP_04 | 6.99 | 8 | 0.54 |
| s_preSEP_09 | 2.83 | 7 | 0.90 |
| s_preSEP_01 | 3.43 | 8 | 0.90 |
| s_preSEP_07 | 3.95 | 9 | 0.91 |
| s_preSEP_02 | 2.29 | 8 | 0.97 |
# plot the all of the curves in the same lattice plot ----
plot(model_2pl_b, type = 'trace', which.items = 1:13, facet_items=FALSE)
Examine information, SEM, and reliability for the whole measure. The plots below show that information maximizes around an average ability level (i.e., in the range of -2 to + 2), standard errors are lower in this range, and reliability is maximized. It is notable that reliability within this region fail to meet the conventional criteria of <=.70.
# examine test information
info_model_2plb <- tibble(
theta = seq(-6,6,.01),
information = testinfo(model_2pl_b, theta),
error = 1/sqrt(information),
reliability = information/(information+1))
# plot test information
plot(model_2pl_b, type='info', MI=1000)
# plot SEM
plot(model_2pl_b, type='SE', MI=1000)
# plot alpha at theta levels
plot(model_2pl_b, type='rxx', MI=1000)
Next, the information curves for each item are examined, looking for overlap in category curves, as well as plateaus in the information curves. Results indicate a most items provide information about students with average ability. Ideally, these information curves should show peaks that are more spread out across the range of the underlying continuum. For example, only items 2, 4, 7, and 8 provide information for students in the above average portion of the latent continuum, though for the same approximate range. This could indicate a lot of redundancy in how the item pool. Finally, the scale of the y-axis should be considered in interpreting these plots.
# table for reference
item.ref %>%
kable(digits = 2, format="html", caption="Item Labels and Reference Numbers", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
| Item | Number |
|---|---|
| s_preSEP_01 | 1 |
| s_preSEP_02 | 2 |
| s_preSEP_03 | 3 |
| s_preSEP_04 | 4 |
| s_preSEP_05 | 5 |
| s_preSEP_06 | 6 |
| s_preSEP_07 | 7 |
| s_preSEP_08 | 8 |
| s_preSEP_09 | 9 |
| s_preSEP_10 | 10 |
| s_preSEP_11 | 11 |
| s_preSEP_12 | 12 |
| s_preSEP_13 | 13 |
| s_preSEP_14 | 14 |
Plot the information curves in one grid for easy comparison.
plot(model_2pl, type = 'infotrace', which.items = 1:14, facet_items=FALSE)
The plots below indicate the association between the standardized raw scores for the measure and the several different IRT generated MAP scores.
# Factor scores vs Standardized total scores
#fs.map <- as.vector(
Theta_PreSep <- fscores(model_2pl_b, method = "MAP", full.scores = TRUE)
STS_PreSep <- as.vector(scale(apply((items.noNA_no12)[-1], 1, sum)))
TOTAL_PreSep<- apply((items.noNA_no12)[-1], 1, sum)
# save the factor scores
pre.sep.theta <- cbind(
items.noNA_no12[1], Theta_PreSep, TOTAL_PreSep, STS_PreSep
) %>%
rename(Theta_PreSep = F1)
# write the data
write_csv(pre.sep.theta, "../data/pre.sep.theta.csv")
# IRT MAP scores vs. standardized scores
p1.map <- ggplot(pre.sep.theta, aes(x=TOTAL_PreSep, y=Theta_PreSep)) +
geom_point()+
geom_smooth() +
theme_minimal() +
ggtitle("IRT scores vs. Standardized Scores") +
labs(y="MAP IRT Score", x="Standardized Scores")
p1.map
# histogram of theta
ggplot(pre.sep.theta, aes(pre.sep.theta$Theta_PreSep)) + geom_histogram(bins=25)